# -*- coding: utf-8 -*-
import scrapy
from ITcast.items import ItcastItem

class ItcastSpider(scrapy.Spider):
    # 爬虫名，必须
    name = 'itcast'
    # 爬虫范围，允许爬虫在这个域名下爬虫
    allowed_domains = ['itcast.cn']
    # url列表，允许执行后第一批请求，将从这个列表里获取
    start_urls = ['http://www.itcast.cn/channel/teacher.shtml#apython']

    def parse(self, response):
        node_list = response.xpath('//div[@class="tea_con"]//div[@class="li_txt"]')

        for node in node_list:
            # 创建item字段对象，用来储存信息
            item = ItcastItem()

            # .extract() 将xpath对象转换为 Unicode字符串
            name = node.xpath('./h3/text()').extract()
            title = node.xpath('./h4/text()').extract()
            info = node.xpath('./p/text()').extract()

            item['name'] = name[0]
            item['title'] = title[0]
            item['info'] = info[0]

            # 返回提取到的每个item数据，给管道文件处理，同事还会回来继续执行后面的代码
            yield item